WGCNA analysis of combined UN and CR gene expression in B. rapa RILs.
Lets start with some notes on what WGCNA is doing. (For more details, see Zhang and Horvath, 2005 )
library(tidyverse)
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
package ‘tidyr’ was built under R version 3.4.2package ‘purrr’ was built under R version 3.4.2package ‘dplyr’ was built under R version 3.4.2Conflicts with tidy packages -------------------------------------------------------------------------------
filter(): dplyr, stats
lag(): dplyr, stats
library(WGCNA)
Loading required package: dynamicTreeCut
Loading required package: fastcluster
Attaching package: ‘fastcluster’
The following object is masked from ‘package:stats’:
hclust
==========================================================================
*
* Package WGCNA 1.61 loaded.
*
* Important note: It appears that your system supports multi-threading,
* but it is not enabled within WGCNA in R.
* To allow multi-threading within WGCNA with all available cores, use
*
* allowWGCNAThreads()
*
* within R. Use disableWGCNAThreads() to disable threading if necessary.
* Alternatively, set the following environment variable on your system:
*
* ALLOW_WGCNA_THREADS=<number_of_processors>
*
* for example
*
* ALLOW_WGCNA_THREADS=4
*
* To set the environment variable in linux bash shell, type
*
* export ALLOW_WGCNA_THREADS=4
*
* before running R. Other operating systems or shells will
* have a similar command to achieve the same aim.
*
==========================================================================
Attaching package: ‘WGCNA’
The following object is masked from ‘package:stats’:
cor
library(ggplot2)
library(edgeR)
Loading required package: limma
package ‘limma’ was built under R version 3.4.2
library(magrittr)
Attaching package: ‘magrittr’
The following object is masked from ‘package:purrr’:
set_names
The following object is masked from ‘package:tidyr’:
extract
library(gplots)
Attaching package: ‘gplots’
The following object is masked from ‘package:stats’:
lowess
library(stringr)
options(stringsAsFactors = FALSE)
Get data
#box_load(162362592785)
load("~/Box Sync/BrapaNetworks/voom.gr.Rdata")
annotation <- read.csv("~/Box Sync/BrapaNetworks/Brapa_V1.5_annotated.csv.gz")
Since I fit without an intercept the p-values are meaningless. Instead, let’s calculate the coefficient of variation and take the top 10,000.
expr.data <- voom.fit.gr$coefficients
dim(expr.data)
[1] 28863 246
head(expr.data[,1:6])
pdata$groupRIL_1_CR pdata$groupRIL_1_UN pdata$groupRIL_103_CR pdata$groupRIL_103_UN
Bra000003 5.964181 5.2876764 5.9319485 5.9508297
Bra000004 4.016551 4.3311935 2.4634976 2.7827178
Bra000005 4.403616 4.2635866 4.3890932 4.4375233
Bra000006 6.223860 6.1049521 5.4889580 5.7819458
Bra000007 1.353169 1.3250627 -0.1417577 -0.3918362
Bra000008 1.475052 0.8752729 1.0509411 -0.3483965
pdata$groupRIL_104_CR pdata$groupRIL_104_UN
Bra000003 6.167282063 5.9389722
Bra000004 2.733587192 1.9124373
Bra000005 4.245502988 3.9905388
Bra000006 5.418127998 6.4033618
Bra000007 0.003458127 -0.3438070
Bra000008 1.068067786 0.7291226
colnames(expr.data) <- colnames(expr.data) %>% str_replace("pdata\\$group","")
colnames(expr.data)
[1] "RIL_1_CR" "RIL_1_UN" "RIL_103_CR" "RIL_103_UN" "RIL_104_CR" "RIL_104_UN" "RIL_113_CR"
[8] "RIL_113_UN" "RIL_115_CR" "RIL_115_UN" "RIL_12_CR" "RIL_12_UN" "RIL_123_CR" "RIL_123_UN"
[15] "RIL_124_CR" "RIL_124_UN" "RIL_131_CR" "RIL_131_UN" "RIL_136_CR" "RIL_136_UN" "RIL_143_CR"
[22] "RIL_143_UN" "RIL_146_CR" "RIL_146_UN" "RIL_147_CR" "RIL_147_UN" "RIL_15_CR" "RIL_15_UN"
[29] "RIL_150_CR" "RIL_150_UN" "RIL_154_CR" "RIL_154_UN" "RIL_155_CR" "RIL_155_UN" "RIL_16_CR"
[36] "RIL_16_UN" "RIL_164_CR" "RIL_164_UN" "RIL_166_CR" "RIL_166_UN" "RIL_171_CR" "RIL_171_UN"
[43] "RIL_174_CR" "RIL_174_UN" "RIL_175_CR" "RIL_175_UN" "RIL_176_CR" "RIL_176_UN" "RIL_182_CR"
[50] "RIL_182_UN" "RIL_183_CR" "RIL_183_UN" "RIL_184_CR" "RIL_184_UN" "RIL_187_CR" "RIL_187_UN"
[57] "RIL_190_CR" "RIL_190_UN" "RIL_193_CR" "RIL_193_UN" "RIL_198_CR" "RIL_198_UN" "RIL_199_CR"
[64] "RIL_199_UN" "RIL_2_CR" "RIL_2_UN" "RIL_201_CR" "RIL_201_UN" "RIL_204_CR" "RIL_204_UN"
[71] "RIL_205_CR" "RIL_205_UN" "RIL_206_CR" "RIL_206_UN" "RIL_207_CR" "RIL_207_UN" "RIL_208_CR"
[78] "RIL_208_UN" "RIL_21_CR" "RIL_21_UN" "RIL_211_CR" "RIL_211_UN" "RIL_212_CR" "RIL_212_UN"
[85] "RIL_213_CR" "RIL_213_UN" "RIL_215_CR" "RIL_215_UN" "RIL_222_CR" "RIL_222_UN" "RIL_223_CR"
[92] "RIL_223_UN" "RIL_225_CR" "RIL_225_UN" "RIL_228_CR" "RIL_228_UN" "RIL_229_CR" "RIL_229_UN"
[99] "RIL_23_CR" "RIL_23_UN" "RIL_232_CR" "RIL_232_UN" "RIL_234_CR" "RIL_235_CR" "RIL_235_UN"
[106] "RIL_240_CR" "RIL_240_UN" "RIL_242_CR" "RIL_242_UN" "RIL_243_CR" "RIL_243_UN" "RIL_248_CR"
[113] "RIL_248_UN" "RIL_25_CR" "RIL_25_UN" "RIL_250_CR" "RIL_250_UN" "RIL_251_CR" "RIL_251_UN"
[120] "RIL_253_CR" "RIL_253_UN" "RIL_255_CR" "RIL_255_UN" "RIL_256_CR" "RIL_256_UN" "RIL_259_CR"
[127] "RIL_259_UN" "RIL_264_CR" "RIL_264_UN" "RIL_265_CR" "RIL_265_UN" "RIL_267_CR" "RIL_267_UN"
[134] "RIL_268_CR" "RIL_268_UN" "RIL_270_CR" "RIL_270_UN" "RIL_277_CR" "RIL_277_UN" "RIL_281_CR"
[141] "RIL_281_UN" "RIL_282_CR" "RIL_282_UN" "RIL_284_CR" "RIL_284_UN" "RIL_285_CR" "RIL_285_UN"
[148] "RIL_288_CR" "RIL_288_UN" "RIL_289_CR" "RIL_289_UN" "RIL_290_CR" "RIL_290_UN" "RIL_294_CR"
[155] "RIL_294_UN" "RIL_30_CR" "RIL_30_UN" "RIL_300_CR" "RIL_300_UN" "RIL_301_CR" "RIL_301_UN"
[162] "RIL_303_CR" "RIL_303_UN" "RIL_308_CR" "RIL_308_UN" "RIL_31_CR" "RIL_31_UN" "RIL_311_CR"
[169] "RIL_318_CR" "RIL_318_UN" "RIL_325_CR" "RIL_325_UN" "RIL_329_CR" "RIL_329_UN" "RIL_332_CR"
[176] "RIL_332_UN" "RIL_337_CR" "RIL_337_UN" "RIL_339_CR" "RIL_339_UN" "RIL_340_CR" "RIL_340_UN"
[183] "RIL_341_CR" "RIL_341_UN" "RIL_344_CR" "RIL_344_UN" "RIL_346_CR" "RIL_346_UN" "RIL_347_CR"
[190] "RIL_347_UN" "RIL_353_CR" "RIL_353_UN" "RIL_354_CR" "RIL_354_UN" "RIL_355_CR" "RIL_355_UN"
[197] "RIL_357_CR" "RIL_357_UN" "RIL_359_CR" "RIL_359_UN" "RIL_36_CR" "RIL_36_UN" "RIL_360_CR"
[204] "RIL_360_UN" "RIL_363_CR" "RIL_363_UN" "RIL_373_CR" "RIL_373_UN" "RIL_376_CR" "RIL_376_UN"
[211] "RIL_380_CR" "RIL_380_UN" "RIL_39_CR" "RIL_39_UN" "RIL_42_CR" "RIL_42_UN" "RIL_46_CR"
[218] "RIL_46_UN" "RIL_53_CR" "RIL_53_UN" "RIL_60_CR" "RIL_60_UN" "RIL_63_CR" "RIL_63_UN"
[225] "RIL_65_CR" "RIL_65_UN" "RIL_66_CR" "RIL_66_UN" "RIL_69_CR" "RIL_69_UN" "RIL_7_CR"
[232] "RIL_7_UN" "RIL_70_CR" "RIL_70_UN" "RIL_73_CR" "RIL_73_UN" "RIL_76_CR" "RIL_76_UN"
[239] "RIL_80_CR" "RIL_80_UN" "RIL_89_CR" "RIL_89_UN" "RIL_9_CR" "RIL_9_UN" "RIL_93_CR"
[246] "RIL_93_UN"
summary(t(expr.data[1:6,]))
Bra000003 Bra000004 Bra000005 Bra000006 Bra000007 Bra000008
Min. :5.068 Min. :1.053 Min. :3.780 Min. :4.468 Min. :-2.49294 Min. :-2.31212
1st Qu.:5.642 1st Qu.:2.568 1st Qu.:4.322 1st Qu.:5.676 1st Qu.:-0.39216 1st Qu.:-0.08721
Median :5.796 Median :3.068 Median :4.444 Median :6.043 Median : 0.08555 Median : 0.54988
Mean :5.837 Mean :3.117 Mean :4.449 Mean :6.009 Mean : 0.08731 Mean : 0.69109
3rd Qu.:5.992 3rd Qu.:3.638 3rd Qu.:4.579 3rd Qu.:6.375 3rd Qu.: 0.54607 3rd Qu.: 1.37467
Max. :6.990 Max. :5.302 Max. :5.114 Max. :7.342 Max. : 2.50402 Max. : 5.06706
cv <- apply(expr.data,1,function(x) {
x <- x - min(x) # scale to get above 0
(sd(x)/abs(mean(x)))*100
})
summary(cv)
Min. 1st Qu. Median Mean 3rd Qu. Max.
7.339 24.937 31.260 31.611 37.199 103.272
hist((cv))
cutoff <- sort(cv,decreasing = TRUE)[10000]
expr.data <- expr.data[cv >= cutoff,]
dim(expr.data)
[1] 10000 246
Get Rob’s Data
blups2012 <- read.csv("~/Box Sync/BrapaNetworks/Brapa2012BayesHeight_blups.csv", row.names = 1)
blups2012$Line %<>% paste("RIL",.,sep="_")
blups2012 %<>% subset(select=!grepl("V1|individual|blk|trt|treat",colnames(.)))
head(blups2012)
blups.CR <- blups2012 %>% select(starts_with("Line"),starts_with("CR"),-matches("Inflection_size")) %>%
mutate(Line=str_c(Line,"_CR"))
head(blups.CR)
blups.UN <- blups2012 %>% select(starts_with("Line"),starts_with("UN"),-matches("Inflection_size")) %>%
mutate(Line=str_c(Line,"_UN"))
head(blups.CR)
transform
head(expr.data[,1:6])
RIL_1_CR RIL_1_UN RIL_103_CR RIL_103_UN RIL_104_CR RIL_104_UN
Bra000003 5.9641813 5.2876764 5.931948 5.9508297 6.1672821 5.9389722
Bra000004 4.0165508 4.3311935 2.463498 2.7827178 2.7335872 1.9124373
Bra000006 6.2238604 6.1049521 5.488958 5.7819458 5.4181280 6.4033618
Bra000008 1.4750516 0.8752729 1.050941 -0.3483965 1.0680678 0.7291226
Bra000010 2.9381829 3.1121607 3.995122 4.7775712 1.9360631 0.7947686
Bra000015 -0.2230333 0.2288016 -1.218044 -0.1893480 -0.5972133 -1.2345910
expr.data.t <- t(expr.data)
head(expr.data.t[,1:6])
Bra000003 Bra000004 Bra000006 Bra000008 Bra000010 Bra000015
RIL_1_CR 5.964181 4.016551 6.223860 1.4750516 2.9381829 -0.2230333
RIL_1_UN 5.287676 4.331193 6.104952 0.8752729 3.1121607 0.2288016
RIL_103_CR 5.931948 2.463498 5.488958 1.0509411 3.9951222 -1.2180440
RIL_103_UN 5.950830 2.782718 5.781946 -0.3483965 4.7775712 -0.1893480
RIL_104_CR 6.167282 2.733587 5.418128 1.0680678 1.9360631 -0.5972133
RIL_104_UN 5.938972 1.912437 6.403362 0.7291226 0.7947686 -1.2345910
check sample quality
gag <- goodSamplesGenes(expr.data.t, verbose = 3)
Flagging genes and samples with too many missing values...
..step 1
gag$allOK
[1] TRUE
cluster samples to look for outliers
sampleTREE <- hclust(dist(expr.data.t), method = "average")
plot(sampleTREE,cex=.6)
heatmap.2(expr.data.t,Rowv=as.dendrogram(sampleTREE), scale="col", trace="none")
The first 6 are really different, remove
bad.samples <- sampleTREE$labels[sampleTREE$order[1:6]]
bad.samples
[1] "RIL_294_CR" "RIL_235_UN" "RIL_23_UN" "RIL_93_UN" "RIL_174_CR" "RIL_337_CR"
expr.data.t <- expr.data.t[-sampleTREE$order[1:6],]
redo clustering
sampleTREE <- hclust(dist(expr.data.t), method = "average")
plot(sampleTREE,cex=.6)
heatmap.2(expr.data.t,Rowv=as.dendrogram(sampleTREE), scale="col", trace="none")
Soft thresholding
powers <- c(c(1:11), seq(from = 12, to=20, by=2))
sft <- pickSoftThreshold(expr.data.t, powerVector = powers, verbose = 5, networkType = "signed hybrid", moreNetworkConcepts = FALSE)
pickSoftThreshold: will use block size 4473.
pickSoftThreshold: calculating connectivity for given powers...
..working on genes 1 through 4473 of 10000
executing %dopar% sequentially: no parallel backend registered
..working on genes 4474 through 8946 of 10000
..working on genes 8947 through 10000 of 10000
Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
1 1 0.511 -0.709 0.767 981.00 860.0000 2180
2 2 0.946 -1.180 0.940 326.00 225.0000 1200
3 3 0.969 -1.280 0.962 149.00 74.5000 830
4 4 0.964 -1.270 0.971 82.60 30.8000 631
5 5 0.958 -1.240 0.974 52.00 14.8000 507
6 6 0.951 -1.210 0.972 35.60 7.7100 422
7 7 0.946 -1.190 0.956 25.90 4.3400 363
8 8 0.938 -1.170 0.944 19.70 2.6100 317
9 9 0.937 -1.150 0.938 15.50 1.6000 281
10 10 0.936 -1.140 0.932 12.50 1.0400 252
11 11 0.928 -1.130 0.919 10.30 0.6710 228
12 12 0.930 -1.120 0.919 8.67 0.4450 207
13 14 0.934 -1.110 0.920 6.34 0.2030 174
14 16 0.933 -1.090 0.915 4.82 0.0980 149
15 18 0.940 -1.090 0.924 3.78 0.0485 129
16 20 0.928 -1.090 0.907 3.03 0.0250 113
sizeGrWindow(9, 5)
par(mfrow = c(1,2))
cex1 <- 0.9
# Scale-free topology fit index as a fCRction of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
choose 2 This gives the best correlations with growth parameters
softPower <- 2
adjacency <- adjacency(expr.data.t, power = softPower, type= "signed hybrid")
# Turn adjacency into topological overlap
TOM <- TOMsimilarity(adjacency);
..connectivity..
..matrix multiplication..
..normalization..
..done.
dissTOM <- 1-TOM
# Call the hierarchical clustering function
geneTree <- hclust(as.dist(dissTOM), method = "average")
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04)
define modules
# We like large modules, so we set the minimum module size relatively high:
minModuleSize <- 30;
# Module identification using dynamic tree cut:
dynamicMods <- cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit <- 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
..done.
table(dynamicMods)
dynamicMods
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
3456 1280 794 588 557 520 355 309 265 172 163 156 154 144 129 115 113 109 105 83 79
22 23 24 25 26 27
66 63 58 56 56 55
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
dynamicColors
black blue brown cyan darkgreen darkgrey darkorange
355 1280 794 144 66 58 56
darkred darkturquoise green greenyellow grey60 lightcyan lightgreen
79 63 557 163 113 115 109
lightyellow magenta midnightblue orange pink purple red
105 265 129 56 309 172 520
royalblue salmon tan turquoise white yellow
83 154 156 3456 55 588
# Plot the dendrogram and colors CRderneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
merge similar modules
# Calculate eigengenes
MEList <- moduleEigengenes(expr.data.t, colors = dynamicColors)
MEs <- MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss <- 1-cor(MEs);
# Cluster module eigengenes
METree <- hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
merge with correlation > 0.8
mean(table(merge$colors))
[1] 370.3704
compare pre and post merge
sizeGrWindow(12, 9)
#pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
#dev.off()
# Rename to moduleColors
moduleColors = mergedColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs
correlate Eigen genes with blups
blups.cor.sig.UN <- blups.cor.UN
blups.cor.sig.UN[blups.cor.P.UN>0.05] <- NA
blups.cor.sig.UN
MEmidnightblue MEpurple MEroyalblue MEsalmon MEcyan MEgrey60 MEdarkred MEdarkgreen
UNr NA NA NA NA NA NA NA 0.3683469
UNHmax NA NA NA -0.3239761 NA NA NA -0.4067543
UNInflection_DD -0.2400879 NA NA NA NA NA NA -0.3502889
UNduration NA NA NA NA NA NA NA -0.3938934
UNSTP NA NA NA -0.3290737 NA NA NA -0.4010090
MEblue MEbrown MEturquoise MEdarkturquoise MEblack MEyellow MEdarkorange
UNr NA 0.2617989 NA -0.2970346 NA NA NA
UNHmax -0.3909326 -0.4484801 -0.2784093 NA NA NA NA
UNInflection_DD NA -0.4245941 -0.4408425 NA -0.2871692 NA NA
UNduration NA -0.4291994 -0.4289251 NA -0.3036147 NA NA
UNSTP -0.3959738 -0.4419869 -0.2688910 NA NA NA NA
MEtan MEwhite MEmagenta MEred MEgreen MEpink MElightyellow MEorange
UNr NA -0.3348373 NA -0.2757473 -0.2532181 NA NA -0.2244186
UNHmax 0.2611232 0.3433132 0.3339532 0.4241171 NA 0.4492634 NA NA
UNInflection_DD NA 0.3285286 NA 0.5233863 0.3736459 0.4317320 NA NA
UNduration NA 0.3731107 NA 0.5347621 0.4000636 0.4359083 NA NA
UNSTP 0.2648908 0.3367553 0.3392389 0.4114069 NA 0.4439670 NA NA
MElightcyan MElightgreen MEdarkgrey MEgreenyellow
UNr -0.2511686 NA NA NA
UNHmax 0.2564586 NA NA NA
UNInflection_DD 0.3174826 NA NA 0.24194
UNduration 0.3464343 NA NA NA
UNSTP 0.2558072 NA NA NA
blups.cor.sig.CR <- blups.cor.CR
blups.cor.sig.CR[blups.cor.P.CR>0.05] <- NA
blups.cor.sig.CR
MEmidnightblue MEpurple MEroyalblue MEsalmon MEcyan MEgrey60 MEdarkred MEdarkgreen
CRr NA NA NA NA 0.2965945 NA NA 0.3732141
CRHmax NA NA NA -0.2643807 NA NA NA -0.3595800
CRInflection_DD -0.2580776 NA NA NA NA NA NA -0.3344883
CRduration -0.2437922 NA NA NA NA NA NA -0.3614897
CRSTP NA NA NA -0.2658797 NA NA NA -0.3549123
MEblue MEbrown MEturquoise MEdarkturquoise MEblack MEyellow MEdarkorange MEtan
CRr 0.2208475 0.4115290 0.2598016 -0.2733151 NA NA NA NA
CRHmax -0.3410736 -0.5299146 -0.4661989 NA NA NA NA NA
CRInflection_DD -0.2325610 -0.5511279 -0.5790470 NA -0.2553127 NA NA NA
CRduration -0.2525814 -0.5605335 -0.5515244 NA -0.2499917 NA NA NA
CRSTP -0.3413666 -0.5235717 -0.4593736 NA NA NA NA NA
MEwhite MEmagenta MEred MEgreen MEpink MElightyellow MEorange MElightcyan
CRr -0.3725419 -0.3029392 -0.4891761 -0.2880703 -0.3399018 NA NA -0.3266381
CRHmax 0.3011879 0.2725302 0.6040239 0.3108611 0.4072426 NA NA 0.3077181
CRInflection_DD 0.3104218 NA 0.6704269 0.3809314 0.3350887 NA NA 0.3536802
CRduration 0.3443851 NA 0.6858383 0.3902848 0.3654567 NA NA 0.3685260
CRSTP 0.2959712 0.2722293 0.5950714 0.3026590 0.4022817 NA NA 0.3093732
MElightgreen MEdarkgrey MEgreenyellow
CRr -0.2705371 NA NA
CRHmax NA -0.2625836 NA
CRInflection_DD NA NA 0.2252131
CRduration NA NA NA
CRSTP NA -0.2680623 NA
plot UN
# Will display correlations and their p-values
par(mar = c(8, 5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = blups.cor.UN,
yLabels = names(blups.UN[-1]),
xLabels = names(MEs),
xSymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = p.asterisk.UN,
setStdMargins = FALSE,
zlim = c(-1,1),
cex.text = 2,
main = paste("UN Module-trait relationships"))
pdf("Module-trait_heatmap_UN.pdf",height = 8,width = 12)
par(mar = c(8, 5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = blups.cor.UN,
yLabels = names(blups.UN[-1]),
xLabels = names(MEs),
xSymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = p.asterisk.UN,
setStdMargins = FALSE,
zlim = c(-1,1),
main = paste("UN Module-trait relationships"))
dev.off()
quartz_off_screen
2
plot CR
# Will display correlations and their p-values
par(mar = c(8, 5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = blups.cor.CR,
yLabels = names(blups.CR[-1]),
xLabels = names(MEs),
xSymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = p.asterisk.CR,
cex.text = 2,
setStdMargins = FALSE,
zlim = c(-1,1),
main = paste("CR Module-trait relationships"))
pdf("Module-trait_heatmap_CR.pdf",height = 8,width = 12)
par(mar = c(8, 5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = blups.cor.CR,
yLabels = names(blups.CR[-1]),
xLabels = names(MEs),
xSymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = p.asterisk.CR,
setStdMargins = FALSE,
zlim = c(-1,1),
main = paste("CR Module-trait relationships"))
dev.off()
quartz_off_screen
2
Arbitrary, but let’s take the max and min for each trait (so long as they are significant)
cor.top.CR
MEbrown MEturquoise MEred
CRr 0.4115290 NA -0.4891761
CRHmax -0.5299146 NA 0.6040239
CRInflection_DD NA -0.579047 0.6704269
CRduration -0.5605335 NA 0.6858383
CRSTP -0.5235717 NA 0.5950714
cor.top.UN
MEdarkgreen MEbrown MEturquoise MEwhite MEred MEpink
UNr 0.3683469 NA NA -0.3348373 NA NA
UNHmax NA -0.4484801 NA NA NA 0.4492634
UNInflection_DD NA NA -0.4408425 NA 0.5233863 NA
UNduration NA -0.4291994 NA NA 0.5347621 NA
UNSTP NA -0.4419869 NA NA NA 0.4439670
write.csv(cor.top.CR,"Eigen_trait_cor_CR_.5_threshold.csv")
write.csv(cor.top.UN,"Eigen_trait_cor_UN_.5_threshold.csv")
write the Eigen genes
head(MEs[,colnames(cor.top.CR)])
write.csv(MEs[,colnames(cor.top.CR)],"Top_Eigen_genes_CR.csv")
head(MEs[,colnames(cor.top.UN)])
write.csv(MEs[,colnames(cor.top.UN)],"Top_Eigen_genes_UN.csv")
write all Eigen genes
write.csv(MEs,"All_Eigen_genes_combined.csv")
save WGCNA info
save(annotation,blups.cor.UN, blups.cor.CR, blups.cor.5.CR, blups.cor.5.UN, blups.cor.P.UN,blups.cor.P.CR,blups.cor.sig.UN,blups.cor.sig.CR,moduleColors,MEs,merge,expr.data.t,
file="~/Box Sync/BrapaNetworks/WGCNA_combined.Rdata")
Get GO list and gene lengths
wget http://www.g3journal.org/content/suppl/2014/08/12/g3.114.012526.DC1/FileS11.txt
wget http://jnmaloof.github.io/BIS180L_web/data/Brapa_CDS_lengths.txt
library(goseq)
Loading required package: BiasedUrn
Loading required package: geneLenDataBase
library(tidyverse)
library(stringr)
Format data for GOseq
go.terms <- read.delim("FileS11.txt",header=FALSE,as.is=TRUE)
head(go.terms)
names(go.terms) <- c("GeneID","GO")
summary(go.terms)
GeneID GO
Length:32317 Length:32317
Class :character Class :character
Mode :character Mode :character
gene.lengths <- read.table("Brapa_CDS_lengths.txt",as.is=TRUE)
head(gene.lengths)
summary(gene.lengths)
GeneID Length
Length:41020 Min. : 150
Class :character 1st Qu.: 573
Mode :character Median : 981
Mean : 1173
3rd Qu.: 1497
Max. :16227
#For this analysis the "Universe" will be all 10000 genes used as input to WGCNA, and the test set will be each module that showed a significant correlation with a FVT blup.
#we need to reduce the gene.length data to only contain entries for those genes in our universe. We also need this as a vector
gene.lengths.module <- gene.lengths %>%
semi_join(module_genes,by="GeneID")
gene.lengths.vector <- as.vector(gene.lengths.module$Length)
names(gene.lengths.vector) <- gene.lengths.module$GeneID
#Do the reverse to make sure everything matches up (it seems that we don't have length info for some genes?)
module_genes <- semi_join(module_genes,gene.lengths.module)
Joining, by = "GeneID"
Format go.terms for goseq. We want them in list format, and we need to separate the terms into separate elements.
go.list <- strsplit(go.terms$GO,split=",")
names(go.list) <- go.terms$GeneID
head(go.list)
$Bra002000
[1] "GO:0005488"
$Bra001201
[1] "GO:0000160" "GO:0009927" "GO:0000155" "GO:0018106" "GO:0005524" "GO:0004740" "GO:0005739"
$Bra001122
[1] "GO:0050832" "GO:0016023" "GO:0008061" "GO:0042742"
$Bra001115
[1] "GO:0009825" "GO:0001578" "GO:0055028" "GO:0007163" "GO:0010031" "GO:0010015" "GO:0010091"
$Bra001108
[1] "GO:0000166" "GO:0003723"
$Bra001050
[1] "GO:0070628" "GO:0043130" "GO:0005634"
Now we will loop through each significant module
First, get the sig modules
sig.modules.UN <- colnames(blups.cor.P.UN) %>%
magrittr::extract(apply(blups.cor.P.UN,2,function(x) any(x < 0.05))) %>%
str_replace("ME","")
sig.modules.CR <- colnames(blups.cor.P.CR) %>%
magrittr::extract(apply(blups.cor.P.CR,2,function(x) any(x < 0.05))) %>%
str_replace("ME","")
sig.modules <- union(sig.modules.UN,sig.modules.CR)
Format module data for goseq. We need a vector for each gene with 1 indicating module membership and 0 indicating not in module
file.remove("~/Box Sync/BrapaNetworks/_Figures_Tables_For_Paper/SupTable_JM4_WGCNA_combined_GO.csv")
cannot remove file '~/Box Sync/BrapaNetworks/_Figures_Tables_For_Paper/SupTable_JM4_WGCNA_combined_GO.csv', reason 'No such file or directory'
[1] FALSE
GO.results <- lapply(sig.modules, function(module) {
module01 <- module_genes$module %>% str_detect(module) %>% as.numeric()
names(module01) <- module_genes$GeneID
#determines if there is bias due to gene length.
nullp.result.tmp <- nullp(DEgenes = module01,bias.data = gene.lengths.vector,plot.fit = FALSE)
#calculate p-values for each GO term
GO.out.tmp <- goseq(pwf = nullp.result.tmp,gene2cat = go.list,test.cats=("GO:BP"))
#Keep CC and BP
GO.out.tmp <- GO.out.tmp[GO.out.tmp$ontology=="BP" | GO.out.tmp$ontology=="CC",]
#Calculate FDR
GO.out.tmp <- GO.out.tmp %>% as.tibble() %>%
mutate(FDR=p.adjust(over_represented_pvalue, method = "fdr"),module=module) %>%
filter(FDR < 0.05) %>%
select(module,term,ontology,FDR,over_represented_pvalue,everything())
write_csv(GO.out.tmp,path = "~/Box Sync/BrapaNetworks/_Figures_Tables_For_Paper/SupTable_JM4_WGCNA_combined_GO.csv",append = TRUE)
GO.out.tmp
})
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
initial point very close to some inequality constraintsUsing manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
Using manually entered categories.
For 2004 genes, we could not find any categories. These genes will be excluded.
To force their use, please run with use_genes_without_cat=TRUE (see documentation).
This was the default behavior for version 1.15.1 and earlier.
Calculating the p-values...
'select()' returned 1:1 mapping between keys and columns
names(GO.results) <- sig.modules
GO.results
$midnightblue
$salmon
$darkgreen
$blue
$brown
$turquoise
$darkturquoise
$black
$tan
$white
$magenta
$red
$green
$pink
$orange
$lightcyan
$greenyellow
$cyan
$lightgreen
$darkgrey
NA
Eigen genes are the first PC. Do these always positively track gene expression in the cluster?
Want to make plots that show gene expression and eigen genes for each “significant” cluster.
pdf("eigenplots_combined.pdf",width = 12, height = 8)
for (this.module in sig.modules) {
this.ME <- MEs %>%
select(paste0("ME",this.module)) %>%
scale() %>%
as.data.frame() %>%
rownames_to_column("RIL") %>%
rename_at(2,str_replace,"ME","")
genes.this.module <- module_genes %>% filter(module==this.module)
expression.this.module <- expr.data.t[,genes.this.module$GeneID] %>%
scale() %>%
as.data.frame() %>%
rownames_to_column("RIL") %>%
as_tibble() %>%
inner_join(this.ME) %>%
arrange(.data[[this.module]]) %>%
mutate(index=row_number()) %>%
gather(key="gene",value = "expression",-RIL, - index) %>%
mutate(eigen=gene==this.module)
expression.this.module
low.alpha <- 7/nrow(genes.this.module)
pl <- expression.this.module %>%
ggplot(aes(x=index,y=expression,group=gene)) +
geom_line(aes(alpha=eigen,color=eigen)) +
scale_alpha_discrete(range = c(low.alpha,1)) +
scale_color_manual(values=c("red","blue")) +
xlab("RIL") +
ggtitle(this.module)
print(pl)
}
Joining, by = "RIL"
dev.off()
null device
1